Setup

#install.packages('rtrek')
#install.packages('ggsci')
library(rtrek)
library(tidyverse) 
library(plotly)
library(ggsci)
#load data
trek_data <- read_csv("trek_data.csv")
#reorder series levels for plotting   
trek_data$Series <- fct_relevel(trek_data$Series, c("TOS", "TNG", "DS9", "VOY"))

1st Damn Fool Question:

Is there a difference in the distribution of number of characters per episode by series?

p <- plot_ly(trek_data, y = ~NChars, color = ~Series, 
             colors =  pal_startrek("uniform")(4)) %>% 
      add_boxplot(x = ~Series) %>% 
      hide_legend() %>%
      layout(title = "Number of Characters Per Episode By Series",
             xaxis = list(title = "Series"),
             yaxis = list(title = "Number of Characters \n (per episode)"))
p
## Warning: Ignoring 1 observations

Proportion of Episodes of which the Characters Are at Least 20% Female, by Series

library(dplyr)
library(plotly)

df<-read.csv("trek_data.csv")

m<-rep(NA,nrow(df))

for(i in 1:nrow(df)){
  m[i]<-df$N_F[i]/df$NChars[i]
}

m<-m[!is.na(m)]

boundary<-.2

Series<-c("TOS","TNG","DS9","VOY")

dfds9<-filter(df,Series=="DS9")

dftng<-filter(df,Series=="TNG")

dftos<-filter(df,Series=="TOS")

dfvoy<-filter(df,Series=="VOY")

ds91<-nrow(filter(dfds9,N_F/NChars>=boundary))/nrow(dfds9)

ds92<-1-ds91

tng1<-nrow(filter(dftng,N_F/NChars>=boundary))/nrow(dftng)

tng2<-1-tng1

tos1<-nrow(filter(dftos,N_F/NChars>=boundary))/nrow(dftos)

tos2<-1-tos1

voy1<-nrow(filter(dfvoy,N_F/NChars>=boundary))/nrow(dfvoy)

voy2<-1-voy1

prop1<-c(tos1,tng1,ds91,voy1)

prop2<-c(tos2,tng2,ds92,voy2)

data<-data.frame(Series,prop1,prop2)

p <- plot_ly(data, x = ~Series, y = ~prop1, type = 'bar', name = 'At Least 20% Female', 
             textfont = list(size = 13))%>%
  add_trace(y = ~prop2, name = 'Less Than 20% Female') %>%
  layout(autosize=F, width=500, height=500, margin= list(l = 50,r = 50,b = 100,t = 100,pad = 10),title = "Proportion of Episodes in which \n the Characters Are at Least 20% Female, by Series \n" ,yaxis = list(title = 'Proportion of Episodes'), barmode = 'group')
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
p